*********************************************************************************************************

	 *REPLICATION MATERIAL FOR: "EDUCATION AND THE PARADOX OF PERCEIVED DISCRIMINATION"
	 
	 * MARGARITA GELEPTHIS AND MARCO GIANI, WORLD POLITICS
	 
*********************************************************************************************************
	 


*** Note: replicators should add to the local ado folder (identify it using the command "sysdir"): 
*a. the DCdensity.ado file
*** (available here: https://eml.berkeley.edu/~jmccrary/DCdensity/) 
*b. the rddisttestk.ado file 
*** (available here: https://economics.byu.edu/frandsen/Pages/Software.aspx) 
*c. the ado, MO and SCML files for the rdrobust package (Version 8.0.3, released 06-04-2020). Files are stored in the "RDrobust" folder, see README for more info.
*** (Github commit 07-08-2020. Available here: https://github.com/rdpackages/rdrobust/tree/2589686d980af0eb2c0edd945d4a43e8bc081872/stata). See also: https://rdpackages.github.io/rdrobust/

 
***Note: the following packages can be installed by uncommenting this section - it should be all, we apologize if some extra package will be flagged up during the replication
/*
ssc install ivtreatreg, replace
ssc install center, replace
ssc install estout, replace
ssc install grstyle, replace
ssc install coefplot, replace
ssc install rddsga, replace
ssc install kountry, replace
ssc install iscogen, replace 
ssc install rangestat, replace
ssc install rangejoin, replace 
ssc install nearmrg, replace
*/


***Note: replicators should set directory to open data 
*cd ""


*set scheme for figures' style
 
 
* The first step is to download rounds 1-9 ESS data from  https://www.europeansocialsurvey.org/data-portal

* In these regards, one must be aware that ESS sometimes publish updates of extant datasets, which may slightly change some estimates. We have the following versions: ESS1e06_6, ESS2e03_6, ESS3e03_7, ESS4e04_5, ESS5e03_4, ESS6e02_4, ESS7e02_2, ESS8e02_1, ESS9e01_2

* To get a smaller dataset, one can optionally use

*npresent, min(`=ceil(_N/10)')
*keep `r(varlist)'

********** CLEANING DATA **********

* PART 1: Define treatment and running variable following  Cavailler and Marshall (2019)


clear all

*harmonize country names

kountry cntry, from(iso2c)  geo(un)  
rename NAMES_STD country


* create year labels 

gen year = 2002 if essround == 1
	replace year = 2004 if essround == 2 
	replace year = 2006 if essround == 3 
	replace year = 2008 if essround == 4 
	replace year = 2010 if essround == 5 
	replace year = 2012 if essround == 6 
	replace year = 2014 if essround == 7
	replace year = 2016 if essround == 8
	replace year = 2018 if essround == 9
	label var year "Survey year"
	gen yearat8 = yrbrn + 8
	gen yearat9 = yrbrn + 9
	gen yearat10 = yrbrn + 10
	gen yearat11 = yrbrn + 11
	gen yearat12 = yrbrn + 12
	gen yearat13 = yrbrn + 13
	gen yearat14 = yrbrn + 14
	gen yearat15 = yrbrn + 15
	foreach n of numlist 8(1)15 {
		label var yearat`n' "Year respondent was aged `n'"
	}
	gen plus5 = 0 if eduyrs!=.
	replace plus5 = 1 if eduyrs > 4
	gen plus6 = 0 if eduyrs!=.
	replace plus6 = 1 if eduyrs > 5
	gen plus7 = 0 if eduyrs!=.
	replace plus7 = 1 if eduyrs > 6
	gen plus8 = 0 if eduyrs!=.
	replace plus8 = 1 if eduyrs > 7
	gen plus9 = 0 if eduyrs!=.
	replace plus9 = 1 if eduyrs > 8
	gen plus10 = 0 if eduyrs!=.
	replace plus10 = 1 if eduyrs > 9
	gen plus11 = 0 if eduyrs!=.
	replace plus11 = 1 if eduyrs > 10
	gen plus12 = 0 if eduyrs!=.
	replace plus12 = 1 if eduyrs > 11
	gen plus13 = 0 if eduyrs!=.
	replace plus13 = 1 if eduyrs > 12
	gen plus14 = 0 if eduyrs!=.
	replace plus14 = 1 if eduyrs > 13
	foreach n of numlist 5(1)14 {
		label var plus`n' "Indicator for respondent obtaining `n' years of schooling"
	}


save Data.dta, replace


* UK has two reforms, so we reappend the relevant data in order to be able to 

clear all 
use Data.dta
keep if cntry=="GB"
gen second_reform = 1 
save UK.dta, replace

*  Combine these datasets in a single file, duplicating the UK dataset to account for the two separate reforms analyzed (that only apply to GB, not UK)

clear all
use Data.dta
append using "UK.dta"
replace second_reform = 0 if second_reform==.
label var second_reform "Indicator for GB's second reform"
save Data.dta, replace
 


*** Define reform treatments

g treatment = 0
replace treatment = 1 if yearat14>=1966 & cntry=="AT"  
replace treatment = 1 if yearat14>=1983 & cntry=="BE"
replace treatment = 1 if yearat15>=1972 & cntry=="GB" & second_reform==1 & regiongb!=12
replace treatment = 1 if yearat14>=1947 & cntry=="GB" & second_reform==0 & regiongb!=12
replace treatment = 1 if yearat15>=1974 & cntry=="NL"
replace treatment = 1 if yearat14>=1967 & cntry=="FR"
replace treatment = 1 if yearat14>=1958 & cntry=="DK"
replace treatment = 1 if yearat14>=1967 & cntry=="IE"
replace treatment = 1 if yearat11>=1963 & cntry=="IT"
replace treatment = 1 if yearat12>=1970 & cntry=="ES"
replace treatment = 1 if yearat14>=1975 & cntry=="IS"
replace treatment = 1 if yearat14>=1965 & cntry=="SE"
replace treatment = 1 if yearat12>=1976 & cntry=="GR"
replace treatment = 1 if yearat14>=1961 & cntry=="CZ"
replace treatment = 1 if yearat14>=1961 & cntry=="SK"
replace treatment = 1 if yearat14>=1961 & cntry=="PL"
replace treatment = 1 if yearat12>=1959 & cntry=="NO"
replace treatment = 1 if yearat12>=1956 & cntry=="CH"
replace treatment = 1 if yearat8>=1964 & cntry=="PT"
replace treatment = 1 if yearat14>=1959 & cntry=="RU" 
replace treatment = 1 if yearat14>=1959 & cntry=="LT" 
replace treatment = 1 if yearat8>=1959 & cntry=="SI" 
replace treatment = 1 if yearat14>=1959 & cntry=="EE" 
replace treatment = 1 if yearat14>=1959 & cntry=="UA" 
replace treatment = 1 if yearat8>=1952 & country=="Croatia" 
*regional FI 
replace treatment = 1 if yearat13>=1969 & regionfi==1 & cntry=="FI"
replace treatment = 1 if yearat13>=1968 & regionfi==2 & cntry=="FI"
replace treatment = 1 if yearat13>=1966 & regionfi==3 & cntry=="FI"
replace treatment = 1 if yearat13>=1965 & regionfi==4 & cntry=="FI"
replace treatment = 1 if yearat13>=1964 & regionfi==5 & cntry=="FI"
*regional DE
replace treatment = 1 if yearat14>=1955 & regionde==1 & cntry=="DE"
replace treatment = 1 if yearat14>=1948 & regionde==2 & cntry=="DE"
replace treatment = 1 if yearat14>=1961 & regionde==3 & cntry=="DE"
replace treatment = 1 if yearat14>=1957 & regionde==4 & cntry=="DE"
replace treatment = 1 if yearat14>=1967 & regionde==5 & cntry=="DE"
replace treatment = 1 if yearat14>=1967 & regionde==6 & cntry=="DE"
replace treatment = 1 if yearat14>=1967 & regionde==7 & cntry=="DE"
replace treatment = 1 if yearat14>=1967 & regionde==8 & cntry=="DE"
replace treatment = 1 if yearat14>=1969 & regionde==9 & cntry=="DE"
replace treatment = 1 if yearat14>=1963 & regionde==10 & cntry=="DE"
replace treatment = 1 if yearat14>=1990 & regionde==12 & cntry=="DE"
replace treatment = 1 if yearat14>=1990 & regionde==13 & cntry=="DE"
replace treatment = 1 if yearat14>=1990 & regionde==14 & cntry=="DE"
replace treatment = 1 if yearat14>=1990 & regionde==15 & cntry=="DE"
replace treatment = 1 if yearat14>=1990 & regionde==16 & cntry=="DE"
replace treatment = . if regionde>=11 & regionde<=16 & cntry=="DE"
 
label var treatment "Indicator for respondents affected by a country's schooling reform"


*** Define running variables

g running = yearat15 - 1972 if cntry=="GB" & second_reform==1 & regiongb!=12
replace running = yearat14 - 1947 if cntry=="GB" & second_reform==0 & regiongb!=12
replace running = yearat14 - 1966 if cntry=="AT"
replace running = yearat15 - 1974 if cntry=="NL"
replace running = yearat14 - 1967 if cntry=="FR"
replace running = yearat14 - 1983 if cntry=="BE"
replace running = yearat14 - 1958 if cntry=="DK"
replace running = yearat14 - 1967 if cntry=="IE"
replace running = yearat11 - 1963 if cntry=="IT"
replace running = yearat12 - 1970 if cntry=="ES"
replace running = yearat14 - 1965 if cntry=="SE"
replace running = yearat12 - 1976 if cntry=="GR"
replace running = yearat8 - 1964 if cntry=="PT"
replace running = yearat14 - 1961 if cntry=="CZ"
replace running = yearat14 - 1961 if cntry=="SK"
replace running = yearat14 - 1959 if cntry=="LT"
replace running = yearat14 - 1961 if cntry=="PL"
replace running = yearat12 - 1959 if cntry=="NO"
replace running = yearat12 - 1956 if cntry=="CH"
replace running = yearat14-1975 if cntry=="IS"
replace running = yearat8 - 1959 if cntry=="SI"
replace running = yearat14 - 1959 if cntry=="EE"
replace running = yearat14 - 1959 if cntry=="UA"
replace running = yearat14 - 1959 if cntry=="LT"
replace running = yearat14 - 1959 if cntry=="RU"
replace running = yearat13 - 1969 if regionfi==1 & cntry=="FI"
replace running = yearat13 - 1968 if regionfi==2 & cntry=="FI"
replace running = yearat13 - 1966 if regionfi==3 & cntry=="FI"
replace running = yearat13 - 1965 if regionfi==4 & cntry=="FI"
replace running = yearat13 - 1964 if regionfi==5 & cntry=="FI"
replace running = yearat14 - 1955 if regionde==1 & cntry=="DE"
replace running = yearat14 - 1948 if regionde==2 & cntry=="DE"
replace running = yearat14 - 1961 if regionde==3 & cntry=="DE"
replace running = yearat14 - 1957 if regionde==4 & cntry=="DE"
replace running = yearat14 - 1967 if regionde==5 & cntry=="DE"
replace running = yearat14 - 1967 if regionde==6 & cntry=="DE"
replace running = yearat14 - 1967 if regionde==7 & cntry=="DE"
replace running = yearat14 - 1967 if regionde==8 & cntry=="DE"
replace running = yearat14 - 1969 if regionde==9 & cntry=="DE"
replace running = yearat14 - 1963 if regionde==10 & cntry=="DE"
label var running "Number of cohorts either side of schooling reform"


*** Label country-reforms

g reform = "GB (1947)" if cntry=="GB" & cntry!="" & yearat14!=. & regiongb!=12 & second_reform==0
replace reform = "GB (1972)" if cntry=="GB" & cntry!="" & yearat14!=. & regiongb!=12 & second_reform==1
replace reform = "DK (1958)" if cntry=="DK" & cntry!="" & yearat14!=.
replace reform = "FR (1967)" if cntry=="FR" & cntry!="" & yearat14!=.
replace reform = "NL (1974)" if cntry=="NL" & cntry!="" & yearat14!=.
replace reform = "SE (1962)" if cntry=="SE" & cntry!="" & yearat14!=.
replace reform = "BE (1983)" if cntry=="BE" & cntry!="" & yearat14!=.
replace reform = "DE (varies by region)" if cntry=="DE" & cntry!="" & yearat14!=.
replace reform = "FI (varies by region)" if cntry=="FI" & cntry!="" & yearat14!=.
replace reform = "IE (1967)" if cntry=="IE" & cntry!="" & yearat14!=.
replace reform = "IT (1963)" if cntry=="IT" & cntry!="" & yearat14!=.
replace reform = "ES (1969)" if cntry=="ES" & cntry!="" & yearat14!=.
replace reform = "AT (1966)" if cntry=="AT" & cntry!="" & yearat14!=.
replace reform = "GR (1975)" if cntry=="GR" & cntry!="" & yearat14!=.
replace reform = "PT (1964)" if cntry=="PT" & cntry!="" & yearat14!=.
replace reform = "CZ (1961)" if cntry=="CZ" & cntry!="" & yearat14!=.
replace reform = "SK (1961)" if cntry=="SK" & cntry!="" & yearat14!=.
replace reform = "PL (1967)" if cntry=="PL" & cntry!="" & yearat14!=.
replace reform = "RU (1959)" if cntry=="RU" & cntry!="" & yearat14!=.
replace reform = "SI (1959)" if cntry=="SI" & cntry!="" & yearat14!=.
replace reform = "UA (1959)" if cntry=="UA" & cntry!="" & yearat14!=.
replace reform = "EE (1959)" if cntry=="EE" & cntry!="" & yearat14!=.
replace reform = "LT (1959)" if cntry=="LT" & cntry!="" & yearat14!=.
replace reform = "IS (1974)" if cntry=="IS" & cntry!="" & yearat14!=.
replace reform = "NO (1959+)" if cntry=="NO" & cntry!="" & yearat14!=.
replace reform = "CH (1956+)" if cntry=="CH" & cntry!="" & yearat14!=.

drop if running==.

label var reform "Country-reform"


*** Apply age and year of birth restrictions
keep if agea >18
keep if yearat15 >= 1930

*** Drop if cohort is not known
drop if yearat15==.

***Drop if new Immigrant 

drop if livecntr!=. & brncntr==2
gen Age_migration=livecnta-yrbrn if brncntr==2
drop if Age_migration<0
drop if Age_migration>6 &  brncntr==2

*** Recode the education variable to cap at 13 years (the point after which secondary schooling ends in countries 

g total_eduyrs = eduyrs
label var total_eduyrs "Years of education (total)"
replace eduyrs = 13 if eduyrs>13

*** Demean years of completed education [necessary for RD estimator below]
g eduyrs_actual = eduyrs
label var eduyrs_actual "Years of education (capped at 13)"
sum eduyrs, det
replace eduyrs = eduyrs - `r(mean)'

save Data.dta, replace
